
/* GCSx
** TOKENIZE.CPP
**
** Script tokenization (to feed to compiler)
*/

/*****************************************************************************
** Copyright (C) 2003-2006 Janson
**
** This program is free software; you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation; either version 2 of the License, or
** (at your option) any later version.
** 
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
** GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License
** along with this program; if not, write to the Free Software
** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
*****************************************************************************/

#include "all.h"

void Tokenizer::deallocRange(list<Tokenizer::Token>::iterator start, list<Tokenizer::Token>::iterator end) { start_func
    for (; start != end; ++start) {
        delete (*start).text;
        (*start).text = NULL;
    }
}

// Maps strings to tokens- derived from tokenStrings
map<string, int>* Tokenizer::tokenLookup = NULL;

Tokenizer::Tokenizer(const list<string>* src) : cached(), bookmarks() { start_func
    initTokenLookups();

    source = src;
    row = source->begin();
    rowNum = 0;
    col = 0;
    atNewLine = 1;
    nextCloseBrace = 0;
    if (row != source->end()) rowLen = (*row).size();
    cacheRecord = 0;
    bookmarkNew = 0;
    cacheReplay = cached.end();
    nextBookmarkName = 1000;
    
    errorCount = 0;
    warningCount = 0;
    silent = 0;

    errRow = 0;
    errCol = 0;
    errBuffer = NULL;
}

Tokenizer::~Tokenizer() { start_func
    deallocRange(cached.begin(), cached.end());
    delete[] errBuffer;
}

int Tokenizer::getBookmarkName() { start_func
    return ++nextBookmarkName;
}

void Tokenizer::initTokenLookups() { start_func
    if (tokenLookup == NULL) {
        tokenLookup = new map<string, int>;
        int pos = 0;
        while (tokenStrings[pos].text) {
            string token = tokenStrings[pos].text;
            tokenLookup->operator[](token) = tokenStrings[pos].type;
            ++pos;
        }
    }
}

void Tokenizer::destroyGlobals() { start_func
    delete tokenLookup;
    tokenLookup = NULL;
}

int Tokenizer::atEOF() { start_func
    if (row == source->end()) return 1;
    return 0;
}

char Tokenizer::getCharacter() { start_func
    tokenizerAssert(row != source->end());
    if (col < rowLen) return (*row)[col];
    return '\0';
}

void Tokenizer::moveNext() { start_func
    tokenizerAssert(row != source->end());
    if (++col > rowLen) nextLine();
}

void Tokenizer::nextLine() { start_func
    tokenizerAssert(row != source->end());
    col = 0;
    ++row;
    ++rowNum;
    if (row != source->end()) rowLen = (*row).size();
}

string Tokenizer::grabUntil(const char* boundaries) throw_int { start_func
    tokenizerAssert(row != source->end());
    if (col >= rowLen) throw 1;
    string::size_type pos = (*row).find_first_of(boundaries, col);
    if (pos >= string::npos) throw 1;
    int prev = col;
    col = pos;
    return (*row).substr(prev, col - prev);
}

string Tokenizer::grabWhile(const char* charset) { start_func
    tokenizerAssert(row != source->end());
    string::size_type pos = (*row).find_first_not_of(charset, col);
    if (pos >= string::npos) pos = rowLen;
    int prev = col;
    col = pos;
    return (*row).substr(prev, col - prev);
}

string Tokenizer::grabRestOfLine() { start_func
    tokenizerAssert(row != source->end());
    if (col >= rowLen) return blankString;
    int prev = col;
    col = rowLen;
    return (*row).substr(prev, col - prev);
}

int Tokenizer::nextToken(int& type, string& token) { start_func
    if (cacheReplay != cached.end()) {
        errRow = (*cacheReplay).rowN;
        errCol = (*cacheReplay).colN;
        type = (*cacheReplay).type;
        token = *((*cacheReplay).text);
        ++cacheReplay;

        // If at end of cache and not recording, clear
        if ((!cacheRecord) && (cacheReplay == cached.end())) {
            deallocRange(cached.begin(), cached.end());
            cached.clear();
            // Replay pointer is already at end
            tokenizerAssert(cacheReplay == cached.end());
        }

        return 1;
    }
    
    int debug = debugLevel() & DEBUG_TOKENIZE;

    if (nextCloseBrace) {
        type = nextCloseBrace;
        token = "}";
        nextCloseBrace = 0;
    }
    else {
        do {
            // EOF?
            if (atEOF()) {
                if (debug) debugWrite(DEBUG_TOKENIZE, "Token: END OF FILE");
                token = blankString;
                type = TOKEN_NONE;
                return 0;
            }
        
            // Clear any whitespace
            grabWhile(WHITE_SPACE);
            
            errRow = rowNum;
            errCol = col;
    
            // Peek at next character to determine what sort of token to parse
            char tokenType = getCharacter();
    
            switch (tokenType) {
                case '\0':
                    // End of line
                    moveNext();
                    token = blankString;
                    type = TOKEN_ENDLINE;
                    
                    // Scan forward to see if a { coming up
                    {
                        int sCol = col;
                        list<string>::const_iterator sRow = row;
                        string::size_type pos;
                        
                        for (;;) {
                            if (sRow == source->end()) break;
                            pos = (*sRow).find_first_not_of(WHITE_SPACE, sCol);
                            if (pos >= string::npos) {
                                sCol = 0;
                                ++sRow;
                                continue;
                            }
                            if ((*sRow)[pos] == '{') {
                                // { is coming up- force discard of endline token
                                atNewLine = 1;
                            }
                            break;
                        }
                    }
                    break;
                    
                case '\'':
                    // Type string
                    token = blankString;
                    moveNext();
                    try {
                        token += grabUntil("\'");
                        moveNext();
                    }
                    catch (int) {
                        outputError("No type-string terminator found on same line (missing ')");
                        // RESOLUTION: treat remainder of line as type string
                        token += grabRestOfLine();
                    }
                    toLower(token);
                    type = TOKEN_STRINGTYPE;
                    break;
                    
                case '"':
                    // String
                    token = blankString;
                    moveNext();
                    try {
                        for (;;) {
                            token += grabUntil("\"\\");
                            if (getCharacter() == '\\') {
                                // Escape sequences
                                moveNext();
                                switch (tokenType = getCharacter()) {
                                    case 'n':
                                        token += "\n";
                                        break;
                                    case 'r':
                                        token += "\r";
                                        break;
                                    case 't':
                                        token += "\t";
                                        break;
                                    default:
                                        outputWarning("Unrecognized escape sequence '\\%c' (to include a backslash in a string, use \\\\)", tokenType);
                                        // RESOLUTION: insert backslash and character verbatim
                                        token += "\\";
                                        // (fall through)
                                    case '\\':
                                    case '"':
                                        token += string(1, tokenType);
                                        break;
                                }
                                moveNext();
                            }
                            else {
                                moveNext();
                                break;
                            }
                        }
                    }
                    catch (int) {
                        outputError("No string terminator found on same line (missing \")");
                        // RESOLUTION: treat remainder of line as string
                        token += grabRestOfLine();
                    }
                    type = TOKEN_STRING;
                    break;
                    
                case '#':
                    // Configuration?
                    if (atNewLine) {
                        token = grabWhile("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789");
                        toLower(token);
                        type = TOKEN_CONFIG;
                        break;
                    }
                    
                    // (otherwise, fall through to normal tokenization)
    
                default:
                    if (((tokenType >= 'a') && (tokenType <= 'z')) ||
                        ((tokenType >= 'A') && (tokenType <= 'Z')) ||
                        (tokenType == '_')) {
                        // Identifier / keyword / etc
                        token = grabWhile("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789");
                        toLower(token);
                        type = TOKEN_IDENTIFIER;
                    
                        // Lookup token in map; otherwise an identifier
                        map<string, int>::iterator found = tokenLookup->find(token);
                        if (found != tokenLookup->end()) type = (*found).second;
                        
                        // obj_ is always reserved, not identifier
                        if (type == TOKEN_IDENTIFIER)
                            if (token.substr(0, 4) == string("obj_"))
                                type = TOKEN_RESERVED;
                    }
                    else if ((tokenType >= '0') && (tokenType <= '9')) {
                        // Number
                        token = grabWhile("0123456789");
                        // Special case- 0x
                        if ((token.size() == 1) && (tokenType == '0') && (tolower(getCharacter()) == 'x')) {
                            token += "x";
                            moveNext();
                            string add = grabWhile("0123456789abcdefABCDEF");
                            token += add;
                            toLower(token);
                            
                            if (add.size() == 0) {
                                outputError("Invalid hexadecimal constant '%s'", token.c_str());
                                // RESOLUTION: add a zero and continue
                                token += "0";
                            }
    
                            type = TOKEN_HEX;
                        }
                        // One decimal allowed
                        else if (getCharacter() == '.') {
                            token += ".";
                            moveNext();
                            string add = grabWhile("0123456789");
                            token += add;
                            
                            if (add.size() == 0) {
                                outputWarning("Invalid decimal constant '%s' (digits must appear before and after decimal point)", token.c_str());
                                // RESOLUTION: add a zero and continue
                                token += "0";
                            }
    
                            type = TOKEN_DECIMAL;
                        }
                        else {
                            type = TOKEN_INTEGER;
                        }
                        
                        // Check for invalid character sequence afterward
                        string mess = grabWhile("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789");
                        if (mess.size() != 0) {
                            outputError("Unrecognized character sequence '%s' at end of numeric constant (identifiers may not begin with a number)", mess.c_str());
                            // RESOLUTION: continue compiling, having discarded invalid characters
                        }
                    }
                    else {
                        // Operator / symbol / comment / etc
                        map<string, int>::iterator found;
                        token = string(1, tokenType);
                        do {
                            // Add to token until it no longer matches a symbol we want
                            moveNext();
                            token += string(1, tokenType = getCharacter());
                            found = tokenLookup->find(token);
                        } while ((tokenType != '\0') && (found != tokenLookup->end()));
                        
                        // The last character we added was not actually discarded, remove it
                        token = token.substr(0, token.size() - 1);
                        
                        // Determine token type
                        found = tokenLookup->find(token);
                        if (found == tokenLookup->end()) type = TOKEN_UNKNOWN;
                        else type = (*found).second;
                        
                        if (type == TOKEN_COMMENT_LINE) {
                            nextLine();
                            token = blankString;
                            type = TOKEN_ENDLINE;
                        }
                        
                        else if (type == TOKEN_COMMENT_BLOCK) {
                            for (;;) {
                                if (atEOF()) {
                                    outputWarning("No end-of-comment marker found (missing */ symbol)");
                                    // RESOLUTION: treat as end-of-file
                                    if (debug) debugWrite(DEBUG_TOKENIZE, "Token: END OF FILE");
                                    token = blankString;
                                    type = TOKEN_NONE;
                                    return 0;
                                }
                                tokenType = getCharacter();
                                moveNext();
                                if ((tokenType == '*') && (getCharacter() == '/')) {
                                    moveNext();
                                    break;
                                }
                            }
                        }
                    }
                    
                    break;
            }
            
            // Loop if we get an endline right after a new line (skip blank lines/empty cmds)
            // Loop for comments also
        } while (((atNewLine) && (type == TOKEN_ENDLINE)) || (type == TOKEN_COMMENT_BLOCK));
    }
    
    // Turn into a new line if before a }
    if ((type == TOKEN_CLOSE_BRACE) && (!atNewLine)) {
        nextCloseBrace = TOKEN_CLOSE_BRACE;
        type = TOKEN_ENDLINE; // Will set atNewLine below
    }
    
    // At a new line for next time?
    // Hide newlines after a { or }
    if ((type == TOKEN_ENDLINE) || (type == TOKEN_CLOSE_BRACE) || (type == TOKEN_OPEN_BRACE)) atNewLine = 1;
    else atNewLine = 0;
    
    // Debug?
    if (debug) {
        if (type & TOKEN_KEYWORD) debugWrite(DEBUG_TOKENIZE, "Token: KEYWORD - %s", token.c_str());
        else if (type & TOKEN_OPERATOR) debugWrite(DEBUG_TOKENIZE, "Token: OPERATOR - %s", token.c_str());
        else debugWrite(DEBUG_TOKENIZE, "Token: %s - %s", debugText[type], token.c_str());
    }

    if (cacheRecord) {
        Token recorded;        
        recorded.type = type;
        recorded.text = new string(token);
        recorded.rowN = errRow;
        recorded.colN = errCol;
        cached.push_back(recorded);

        if (bookmarkNew) {
            for (map<int, list<Token>::iterator>::iterator pos = bookmarks.begin(); pos != bookmarks.end(); ++pos) {
                if ((*pos).second == cached.end()) --(*pos).second;
            }
            bookmarkNew = 0;
        }        

        // Replay pointer is already at end
        tokenizerAssert(cacheReplay == cached.end());
    }

    return 1;
}

void Tokenizer::skipToken() { start_func
    if (cacheReplay != cached.end()) {
        ++cacheReplay;

        // If at end of cache and not recording, clear
        if ((!cacheRecord) && (cacheReplay == cached.end())) {
            deallocRange(cached.begin(), cached.end());
            cached.clear();
            // Replay pointer is already at end
            tokenizerAssert(cacheReplay == cached.end());
        }
    }
    else {
        int type;
        string token;
        nextToken(type, token);
    }
}

int Tokenizer::peekToken(int& type, string& token) { start_func
    if (cacheReplay != cached.end()) {
        errRow = (*cacheReplay).rowN;
        errCol = (*cacheReplay).colN;
        type = (*cacheReplay).type;
        token = *((*cacheReplay).text);
        return 1;
    }

    if (nextToken(type, token)) {
        // Don't readd to cache if already recording
        if (!cacheRecord) {
            Token peeked;        
            peeked.type = type;
            peeked.text = new string(token);
            peeked.rowN = errRow;
            peeked.colN = errCol;
            cached.push_back(peeked);
        }
        // Replay pointer is at end- move to next-to-last
        tokenizerAssert(cacheReplay == cached.end());
        --cacheReplay;
        return 1;
    }

    return 0;
}

void Tokenizer::bookmarkStore(int name) { start_func
    bookmarks[name] = cacheReplay;
    if (cacheReplay == cached.end()) bookmarkNew = 1;
    cacheRecord = 1;
}

void Tokenizer::bookmarkReturn(int name) { start_func
    tokenizerAssert(bookmarks.find(name) != bookmarks.end());
    // Return to start of cache
    cacheReplay = (*(bookmarks.find(name))).second;
}

void Tokenizer::bookmarkCancel(int name) { start_func
    if (bookmarks.find(name) != bookmarks.end()) {
        bookmarks.erase(name);
        if (bookmarks.empty()) {
            cacheRecord = 0;
            // Clear anything in cache prior to current replay position
            deallocRange(cached.begin(), cacheReplay);
            cached.erase(cached.begin(), cacheReplay);
            tokenizerAssert(cacheReplay == cached.begin());
        }
    }
}

#define ERROR_BUFFER_SIZE 1024

void Tokenizer::outputError(const char* text, ...) { start_func
    va_list arglist;
    va_start(arglist, text);

    if (!silent) {
        if (!errBuffer) errBuffer = new char[ERROR_BUFFER_SIZE];
    
        vsnprintf(errBuffer, ERROR_BUFFER_SIZE, text, arglist);
        errBuffer[ERROR_BUFFER_SIZE - 1] = 0;
        
        // @TODO: Better output (debug window during gameplay; error window during editor)
        debugWrite("ERROR row %d col %d: %s", errRow + 1, errCol + 1, errBuffer);
    }
    
    ++errorCount;

    va_end(arglist);
}

void Tokenizer::outputWarning(const char* text, ...) { start_func
    va_list arglist;
    va_start(arglist, text);

    if (!silent) {
        if (!errBuffer) errBuffer = new char[ERROR_BUFFER_SIZE];
    
        vsnprintf(errBuffer, ERROR_BUFFER_SIZE, text, arglist);
        errBuffer[ERROR_BUFFER_SIZE - 1] = 0;
        
        // @TODO: Better output (debug window during gameplay; error window during editor)
        debugWrite("WARNING row %d col %d: %s", errRow + 1, errCol + 1, errBuffer);
    }

    ++warningCount;

    va_end(arglist);
}

void Tokenizer::silentErrors(int newSilent) { start_func
    silent = newSilent;
}

void Tokenizer::resetErrors() { start_func
    errorCount = 0;
    warningCount = 0;
    silent = 0;
}
